import os
import pandas as pd
import requests

# Load the Excel file
excel_file = "DB_API_temp_Updated.xlsx"  # Replace with your actual file name
df = pd.read_excel(excel_file)

# Create a folder to store MOL files
output_folder = "MolFiles"
os.makedirs(output_folder, exist_ok=True)

# Iterate through the DataFrame
for index, row in df.iterrows():
    cid_value = str(row["CID"]).strip()  # Convert to string and remove extra spaces

    # Check if "Smiles" is missing and "CID" is a valid number (ignoring "Miss_CID..." or other text)
    if pd.isna(row["Smiles"]) and cid_value.isdigit():
        cid = int(cid_value)  # Convert valid CID to integer

        mol_url = f"https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/cid/{cid}/record/SDF/?record_type=3d&response_type=save"  # URL to download MOL file

        response = requests.get(mol_url)
        if response.status_code == 200:
            mol_file_path = os.path.join(output_folder, f"{cid}.mol")
            with open(mol_file_path, "wb") as file:
                file.write(response.content)
            print(f"Downloaded: {cid}.mol")
        else:
            print(f"Failed to download MOL file for CID {cid}")
    else:
        print(f"Skipping invalid CID: {cid_value}")

print("Download process completed.")
